package com.imooc.scala.log

import org.apache.spark.sql.{SaveMode, SparkSession}

object SparkStatCleanJob {

  //2.
  def main(args: Array[String]): Unit = {
    val spark = SparkSession.builder().appName("SparkStatCleanJob").master("local[*]").getOrCreate()

    val accessRDD = spark.sparkContext.textFile("nginx_log/access.log")
    //    accessRDD.take(10).foreach(println)

    val accessDF = spark.createDataFrame(accessRDD.map(x => AccessConvertUtil.parseLog(x)), AccessConvertUtil.struct)
//    accessDF.printSchema()
//    accessDF.show(false)

    accessDF.coalesce(1).write.format("parquet").mode(SaveMode.Overwrite).partitionBy("day").save("clean")

    spark.stop()

  }
}
